I will put various functions to compute statistics on slides and pyramids in here


In [70]:
import pymongo
client = pymongo.MongoClient('localhost',27017)
db_ptr = client['TCGA_Slide_DB']
load_errors_db = client['CDSA_LoadErrors']

In [106]:
## COUND SLIDES IN THE COLLECTION
collection_list = [ x for x in db_ptr.collection_names() if x!= 'system.indexes' ]
print collection_list


svs_summary_dict = {}

for cl in collection_list:
    total_slides =  db_ptr[cl].count()
    
    svs_summary_dict[cl] = {}
    
    frozen_sects =  db_ptr[cl].count({'prep_type': 'tissue_image'})
    svs_summary_dict[cl]['frozen_sects'] =  frozen_sects
    
    diag_sects =  db_ptr[cl].count({'prep_type': 'diagnostic'})
    svs_summary_dict[cl]['diag_sects'] = diag_sects
    
    unknown_sects =  db_ptr[cl].count({'prep_type': 'Unknown'})
    print "%s total: %d frozen sects: %d diagnostic: %d unknown: %d" % ( cl, total_slides, frozen_sects,diag_sects, unknown_sects)
#db_ptr['prad'].find_one()


[u'prad', u'acc', u'gbm', u'lgg', u'coad', u'luad', u'lusc', u'meso', u'cesc', u'ucec', u'read', u'thca', u'brca', u'kirc', u'kirp', u'sarc', u'paad', u'dlbc', u'pcpg', u'blca', u'thym', u'ucs', u'kich', u'stad', u'chol', u'hnsc', u'ov', u'tgct', u'skcm', u'esca', u'lihc']
prad total: 1278 frozen sects: 770 diagnostic: 508 unknown: 0
acc total: 329 frozen sects: 96 diagnostic: 233 unknown: 0
gbm total: 2503 frozen sects: 1435 diagnostic: 1068 unknown: 0
lgg total: 1776 frozen sects: 866 diagnostic: 910 unknown: 0
coad total: 2014 frozen sects: 1547 diagnostic: 467 unknown: 0
luad total: 2104 frozen sects: 1540 diagnostic: 564 unknown: 0
lusc total: 2057 frozen sects: 1478 diagnostic: 579 unknown: 0
meso total: 183 frozen sects: 88 diagnostic: 95 unknown: 0
cesc total: 724 frozen sects: 442 diagnostic: 282 unknown: 0
ucec total: 2142 frozen sects: 1547 diagnostic: 595 unknown: 0
read total: 906 frozen sects: 744 diagnostic: 162 unknown: 0
thca total: 1384 frozen sects: 834 diagnostic: 550 unknown: 0
brca total: 5307 frozen sects: 4126 diagnostic: 1181 unknown: 0
kirc total: 3748 frozen sects: 3182 diagnostic: 566 unknown: 0
kirp total: 887 frozen sects: 574 diagnostic: 313 unknown: 0
sarc total: 891 frozen sects: 289 diagnostic: 602 unknown: 0
paad total: 512 frozen sects: 297 diagnostic: 215 unknown: 0
dlbc total: 110 frozen sects: 59 diagnostic: 51 unknown: 0
pcpg total: 391 frozen sects: 192 diagnostic: 199 unknown: 0
blca total: 1091 frozen sects: 630 diagnostic: 461 unknown: 0
thym total: 317 frozen sects: 137 diagnostic: 180 unknown: 0
ucs total: 158 frozen sects: 63 diagnostic: 95 unknown: 0
kich total: 335 frozen sects: 214 diagnostic: 121 unknown: 0
stad total: 1643 frozen sects: 1209 diagnostic: 434 unknown: 0
chol total: 90 frozen sects: 51 diagnostic: 39 unknown: 0
hnsc total: 1386 frozen sects: 884 diagnostic: 502 unknown: 0
ov total: 1599 frozen sects: 1492 diagnostic: 107 unknown: 0
tgct total: 410 frozen sects: 156 diagnostic: 254 unknown: 0
skcm total: 981 frozen sects: 480 diagnostic: 501 unknown: 0
esca total: 396 frozen sects: 238 diagnostic: 158 unknown: 0
lihc total: 1080 frozen sects: 701 diagnostic: 379 unknown: 0

In [77]:
diag_slides =0
frozen_sects = 0
for k in svs_summary_dict:
    diag_slides += svs_summary_dict[k]['diag_sects']
    frozen_sects += svs_summary_dict[k]['frozen_sects']
print diag_slides, frozen_sects


8868 19855

In [85]:
error_db = client['CDSA_LoadErrors']
print error_db.collection_names()
cs_l =  error_db['corrupt_slides'].distinct('full_file_name')

for c in cs_l:
    print c


[u'system.indexes', u'rescanned_slides', u'corrupt_slides', u'Pyramid_Dups']
/TCGA_MIRROR/tcga-data.nci.nih.gov/tcgafiles/ftp_auth/distro_ftpusers/anonymous/tumor/acc/bcr/nationwidechildrens.org/diagnostic_images/slide_images/nationwidechildrens.org_ACC.diagnostic_images.Level_1.304.3.0/.TCGA-OR-A5JL-01Z-00-DX3.89E8DA21-12FB-4263-8452-B64F0ED558D8.svs.9gSgQu
/TCGA_MIRROR/tcga-data.nci.nih.gov/tcgafiles/ftp_auth/distro_ftpusers/anonymous/tumor/acc/bcr/nationwidechildrens.org/diagnostic_images/slide_images/nationwidechildrens.org_ACC.diagnostic_images.Level_1.304.3.0/.TCGA-OR-A5JP-01Z-00-DX1.15ED1958-011D-4247-91A3-99893466935D.svs.vSanGz
/TCGA_MIRROR/tcga-data.nci.nih.gov/tcgafiles/ftp_auth/distro_ftpusers/anonymous/tumor/gbm/bcr/intgen.org/diagnostic_images/slide_images/intgen.org_GBM.diagnostic_images.Level_1.5.1.0/TCGA-02-0321-01Z-00-DX1.3c57c27d-4a4f-45af-bf8b-2bde421a42fa.svs.dzi.tif
/TCGA_MIRROR/tcga-data.nci.nih.gov/tcgafiles/ftp_auth/distro_ftpusers/anonymous/tumor/coad/bcr/nationwidechildrens.org/diagnostic_images/slide_images/nationwidechildrens.org_COAD.diagnostic_images.Level_1.138.10.0/TCGA-AA-3489-01Z-00-DX1.AE299B70-B14C-4FFE-B1F9-38B2EB267FA9.svs
/TCGA_MIRROR/tcga-data.nci.nih.gov/tcgafiles/ftp_auth/distro_ftpusers/anonymous/tumor/coad/bcr/nationwidechildrens.org/diagnostic_images/slide_images/nationwidechildrens.org_COAD.diagnostic_images.Level_1.138.10.0/TCGA-AA-3511-01Z-00-DX1.F66F89C7-147D-4EE9-A482-61C3033EF443.svs
/TCGA_MIRROR/tcga-data.nci.nih.gov/tcgafiles/ftp_auth/distro_ftpusers/anonymous/tumor/coad/bcr/nationwidechildrens.org/diagnostic_images/slide_images/nationwidechildrens.org_COAD.diagnostic_images.Level_1.138.10.0/TCGA-AA-3496-01Z-00-DX1.B109A6F3-02E0-4181-B69A-00CBA758C074.svs
/TCGA_MIRROR/tcga-data.nci.nih.gov/tcgafiles/ftp_auth/distro_ftpusers/anonymous/tumor/coad/bcr/nationwidechildrens.org/diagnostic_images/slide_images/nationwidechildrens.org_COAD.diagnostic_images.Level_1.138.9.0/TCGA-AA-3489-01Z-00-DX1.AE299B70-B14C-4FFE-B1F9-38B2EB267FA9.svs
/TCGA_MIRROR/tcga-data.nci.nih.gov/tcgafiles/ftp_auth/distro_ftpusers/anonymous/tumor/coad/bcr/nationwidechildrens.org/diagnostic_images/slide_images/nationwidechildrens.org_COAD.diagnostic_images.Level_1.138.9.0/TCGA-AA-3511-01Z-00-DX1.F66F89C7-147D-4EE9-A482-61C3033EF443.svs
/TCGA_MIRROR/tcga-data.nci.nih.gov/tcgafiles/ftp_auth/distro_ftpusers/anonymous/tumor/coad/bcr/nationwidechildrens.org/diagnostic_images/slide_images/nationwidechildrens.org_COAD.diagnostic_images.Level_1.138.9.0/TCGA-AA-3496-01Z-00-DX1.B109A6F3-02E0-4181-B69A-00CBA758C074.svs
/TCGA_MIRROR/tcga-data.nci.nih.gov/tcgafiles/ftp_auth/distro_ftpusers/anonymous/tumor/coad/bcr/nationwidechildrens.org/diagnostic_images/slide_images/nationwidechildrens.org_COAD.diagnostic_images.Level_1.138.8.0/TCGA-AA-3489-01Z-00-DX1.AE299B70-B14C-4FFE-B1F9-38B2EB267FA9.svs
/TCGA_MIRROR/tcga-data.nci.nih.gov/tcgafiles/ftp_auth/distro_ftpusers/anonymous/tumor/coad/bcr/nationwidechildrens.org/diagnostic_images/slide_images/nationwidechildrens.org_COAD.diagnostic_images.Level_1.138.8.0/TCGA-AA-3511-01Z-00-DX1.F66F89C7-147D-4EE9-A482-61C3033EF443.svs
/TCGA_MIRROR/tcga-data.nci.nih.gov/tcgafiles/ftp_auth/distro_ftpusers/anonymous/tumor/coad/bcr/nationwidechildrens.org/diagnostic_images/slide_images/nationwidechildrens.org_COAD.diagnostic_images.Level_1.138.8.0/TCGA-AA-3496-01Z-00-DX1.B109A6F3-02E0-4181-B69A-00CBA758C074.svs
/TCGA_MIRROR/tcga-data.nci.nih.gov/tcgafiles/ftp_auth/distro_ftpusers/anonymous/tumor/coad/bcr/nationwidechildrens.org/diagnostic_images/slide_images/nationwidechildrens.org_COAD.diagnostic_images.Level_1.76.3.0/TCGA-AA-3506-01Z-00-DX1.08CFD143-1A4C-4262-831E-79D9C4BBF453.svs
/TCGA_MIRROR/tcga-data.nci.nih.gov/tcgafiles/ftp_auth/distro_ftpusers/anonymous/tumor/coad/bcr/nationwidechildrens.org/diagnostic_images/slide_images/nationwidechildrens.org_COAD.diagnostic_images.Level_1.76.3.0/TCGA-AA-3488-01Z-00-DX1.EDF60198-F7AB-45BB-9A1B-C2E2FA141989.svs
/TCGA_MIRROR/tcga-data.nci.nih.gov/tcgafiles/ftp_auth/distro_ftpusers/anonymous/tumor/coad/bcr/nationwidechildrens.org/diagnostic_images/slide_images/nationwidechildrens.org_COAD.diagnostic_images.Level_1.76.3.0/TCGA-AA-3510-01Z-00-DX1.B4FCE76A-3B60-4D7D-9F3E-973AE17EA8E2.svs
/TCGA_MIRROR/tcga-data.nci.nih.gov/tcgafiles/ftp_auth/distro_ftpusers/anonymous/tumor/coad/bcr/nationwidechildrens.org/diagnostic_images/slide_images/nationwidechildrens.org_COAD.diagnostic_images.Level_1.76.3.0/TCGA-AA-3495-01Z-00-DX1.67DEE36B-724E-4B4F-B3A9-B4E8CCCEFA80.svs
/TCGA_MIRROR/tcga-data.nci.nih.gov/tcgafiles/ftp_auth/distro_ftpusers/anonymous/tumor/coad/bcr/nationwidechildrens.org/diagnostic_images/slide_images/nationwidechildrens.org_COAD.diagnostic_images.Level_1.76.3.0/TCGA-AA-3492-01Z-00-DX1.32D79909-71D5-4843-847E-AECA5DBC963D.svs
/TCGA_MIRROR/tcga-data.nci.nih.gov/tcgafiles/ftp_auth/distro_ftpusers/anonymous/tumor/coad/bcr/nationwidechildrens.org/diagnostic_images/slide_images/nationwidechildrens.org_COAD.diagnostic_images.Level_1.76.3.0/TCGA-AA-3509-01Z-00-DX1.EAE46823-3132-486F-8C2A-C0F548A08335.svs
/TCGA_MIRROR/tcga-data.nci.nih.gov/tcgafiles/ftp_auth/distro_ftpusers/anonymous/tumor/coad/bcr/nationwidechildrens.org/diagnostic_images/slide_images/nationwidechildrens.org_COAD.diagnostic_images.Level_1.76.3.0/TCGA-AA-3494-01Z-00-DX1.E275AF20-3AA7-4191-BD1F-FFE744CA6A2F.svs
/TCGA_MIRROR/tcga-data.nci.nih.gov/tcgafiles/ftp_auth/distro_ftpusers/anonymous/tumor/coad/bcr/nationwidechildrens.org/diagnostic_images/slide_images/nationwidechildrens.org_COAD.diagnostic_images.Level_1.116.8.0/TCGA-AA-3655-01Z-00-DX1.D78D8DBE-E74F-491D-AC9C-10E4C4E7BB02.svs
/TCGA_MIRROR/tcga-data.nci.nih.gov/tcgafiles/ftp_auth/distro_ftpusers/anonymous/tumor/coad/bcr/nationwidechildrens.org/diagnostic_images/slide_images/nationwidechildrens.org_COAD.diagnostic_images.Level_1.116.8.0/TCGA-AA-3712-01Z-00-DX1.00E0ACE2-8CC5-4063-9C65-3CDD7F21B189.svs
/TCGA_MIRROR/tcga-data.nci.nih.gov/tcgafiles/ftp_auth/distro_ftpusers/anonymous/tumor/coad/bcr/nationwidechildrens.org/diagnostic_images/slide_images/nationwidechildrens.org_COAD.diagnostic_images.Level_1.116.8.0/TCGA-AA-3713-01Z-00-DX1.8148ACEB-7C1E-4D29-B908-F3729657EA4F.svs
/TCGA_MIRROR/tcga-data.nci.nih.gov/tcgafiles/ftp_auth/distro_ftpusers/anonymous/tumor/coad/bcr/nationwidechildrens.org/diagnostic_images/slide_images/nationwidechildrens.org_COAD.diagnostic_images.Level_1.116.8.0/TCGA-AA-3660-01Z-00-DX1.CCD0F50D-9991-4CC2-AC77-AD1F78D8CFEB.svs
/TCGA_MIRROR/tcga-data.nci.nih.gov/tcgafiles/ftp_auth/distro_ftpusers/anonymous/tumor/coad/bcr/nationwidechildrens.org/diagnostic_images/slide_images/nationwidechildrens.org_COAD.diagnostic_images.Level_1.116.8.0/TCGA-AA-3662-01Z-00-DX1.625F1BCC-5E59-411E-AE23-6F43CE6122B2.svs
/TCGA_MIRROR/tcga-data.nci.nih.gov/tcgafiles/ftp_auth/distro_ftpusers/anonymous/tumor/coad/bcr/nationwidechildrens.org/diagnostic_images/slide_images/nationwidechildrens.org_COAD.diagnostic_images.Level_1.116.8.0/TCGA-AA-3697-01Z-00-DX1.AAB8DB74-F76D-4D0A-A50E-E7F97504A3C4.svs
/TCGA_MIRROR/tcga-data.nci.nih.gov/tcgafiles/ftp_auth/distro_ftpusers/anonymous/tumor/coad/bcr/nationwidechildrens.org/diagnostic_images/slide_images/nationwidechildrens.org_COAD.diagnostic_images.Level_1.116.8.0/TCGA-AA-3663-01Z-00-DX1.9AEDC003-2062-4876-8993-A5CEE4DDE1A9.svs
/TCGA_MIRROR/tcga-data.nci.nih.gov/tcgafiles/ftp_auth/distro_ftpusers/anonymous/tumor/luad/bcr/nationwidechildrens.org/diagnostic_images/slide_images/nationwidechildrens.org_LUAD.diagnostic_images.Level_1.119.12.0/TCGA-05-4425-01Z-00-DX1.82B093EE-49BC-4FD9-91AC-4CC89944309D.svs
/TCGA_MIRROR/tcga-data.nci.nih.gov/tcgafiles/ftp_auth/distro_ftpusers/anonymous/tumor/luad/bcr/nationwidechildrens.org/diagnostic_images/slide_images/nationwidechildrens.org_LUAD.diagnostic_images.Level_1.119.12.0/TCGA-05-4384-01Z-00-DX1.CA68BF29-BBE3-4C8E-B48B-554431A9EE13.svs
/TCGA_MIRROR/tcga-data.nci.nih.gov/tcgafiles/ftp_auth/distro_ftpusers/anonymous/tumor/luad/bcr/nationwidechildrens.org/diagnostic_images/slide_images/nationwidechildrens.org_LUAD.diagnostic_images.Level_1.119.12.0/TCGA-05-4390-01Z-00-DX1.858E64DF-DD3E-4F43-B7C1-CE35B33F1C90.svs
/TCGA_MIRROR/tcga-data.nci.nih.gov/tcgafiles/ftp_auth/distro_ftpusers/anonymous/tumor/luad/bcr/nationwidechildrens.org/diagnostic_images/slide_images/nationwidechildrens.org_LUAD.diagnostic_images.Level_1.238.4.0/TCGA-97-8547-01Z-00-DX1.873DFBAA-2875-481F-BF47-2C12B1D96CE6.svs
/TCGA_MIRROR/tcga-data.nci.nih.gov/tcgafiles/ftp_auth/distro_ftpusers/anonymous/tumor/luad/bcr/nationwidechildrens.org/diagnostic_images/slide_images/nationwidechildrens.org_LUAD.diagnostic_images.Level_1.238.4.0/TCGA-97-8176-01Z-00-DX1.444FE201-4F7B-47B5-8FE5-6CB31E524E6E.svs
/TCGA_MIRROR/tcga-data.nci.nih.gov/tcgafiles/ftp_auth/distro_ftpusers/anonymous/tumor/luad/bcr/nationwidechildrens.org/diagnostic_images/slide_images/nationwidechildrens.org_LUAD.diagnostic_images.Level_1.238.1.0/TCGA-97-8547-01Z-00-DX1.873DFBAA-2875-481F-BF47-2C12B1D96CE6.svs
/TCGA_MIRROR/tcga-data.nci.nih.gov/tcgafiles/ftp_auth/distro_ftpusers/anonymous/tumor/luad/bcr/nationwidechildrens.org/diagnostic_images/slide_images/nationwidechildrens.org_LUAD.diagnostic_images.Level_1.238.1.0/TCGA-97-8176-01Z-00-DX1.444FE201-4F7B-47B5-8FE5-6CB31E524E6E.svs
/TCGA_MIRROR/tcga-data.nci.nih.gov/tcgafiles/ftp_auth/distro_ftpusers/anonymous/tumor/luad/bcr/nationwidechildrens.org/diagnostic_images/slide_images/nationwidechildrens.org_LUAD.diagnostic_images.Level_1.144.15.0/TCGA-05-4410-01Z-00-DX1.E5B66334-4949-4F45-9200-296B1A2F1AD5.svs
/TCGA_MIRROR/tcga-data.nci.nih.gov/tcgafiles/ftp_auth/distro_ftpusers/anonymous/tumor/luad/bcr/nationwidechildrens.org/diagnostic_images/slide_images/nationwidechildrens.org_LUAD.diagnostic_images.Level_1.84.13.0/TCGA-05-5425-01Z-00-DX1.85865B2F-4888-43DD-A501-458BEFCF832B.svs
/TCGA_MIRROR/tcga-data.nci.nih.gov/tcgafiles/ftp_auth/distro_ftpusers/anonymous/tumor/luad/bcr/nationwidechildrens.org/diagnostic_images/slide_images/nationwidechildrens.org_LUAD.diagnostic_images.Level_1.84.13.0/TCGA-05-5420-01Z-00-DX1.8C253A99-44FD-48B6-AF31-D808CCB7DB1E.svs
/PYRAMIDS/PYRAMIDS/CDSA/ESCA_Frozen/nationwidechildrens.org_ESCA.tissue_images.Level_1.298.0.0/TCGA-LN-A4MR-01A-01-TSA.09941ABA-35AD-415C-BE4F-BFACB60BFD8C.svs.dzi.tif
/TCGA_MIRROR/tcga-data.nci.nih.gov/tcgafiles/ftp_auth/distro_ftpusers/anonymous/tumor/luad/bcr/nationwidechildrens.org/diagnostic_images/slide_images/nationwidechildrens.org_LUAD.diagnostic_images.Level_1.84.13.0/TCGA-05-5423-01Z-00-DX1.CCCF5FDB-ACAD-4D9D-80DF-556F0D6284AF.svs
/TCGA_MIRROR/tcga-data.nci.nih.gov/tcgafiles/ftp_auth/distro_ftpusers/anonymous/tumor/luad/bcr/nationwidechildrens.org/diagnostic_images/slide_images/nationwidechildrens.org_LUAD.diagnostic_images.Level_1.84.13.0/TCGA-05-5428-01Z-00-DX1.8018AD62-F1CE-4BFF-8EFD-3F2D4513FC11.svs
/TCGA_MIRROR/tcga-data.nci.nih.gov/tcgafiles/ftp_auth/distro_ftpusers/anonymous/tumor/luad/bcr/nationwidechildrens.org/diagnostic_images/slide_images/nationwidechildrens.org_LUAD.diagnostic_images.Level_1.84.13.0/TCGA-05-5715-01Z-00-DX1.D3F0A1FA-2507-45FF-823F-F9981E62BB4C.svs
/TCGA_MIRROR/tcga-data.nci.nih.gov/tcgafiles/ftp_auth/distro_ftpusers/anonymous/tumor/luad/bcr/nationwidechildrens.org/diagnostic_images/slide_images/nationwidechildrens.org_LUAD.diagnostic_images.Level_1.84.13.0/TCGA-05-5429-01Z-00-DX1.20729065-FADA-4E43-98D7-AFA5FB4A0447.svs
/TCGA_MIRROR/tcga-data.nci.nih.gov/tcgafiles/ftp_auth/distro_ftpusers/anonymous/tumor/cesc/bcr/nationwidechildrens.org/diagnostic_images/slide_images/nationwidechildrens.org_CESC.diagnostic_images.Level_1.127.4.0/TCGA-DS-A1OA-01Z-00-DX1.2452ACCB-9788-44C3-8041-3D9E5A3620BD.svs
/TCGA_MIRROR/tcga-data.nci.nih.gov/tcgafiles/ftp_auth/distro_ftpusers/anonymous/tumor/cesc/bcr/nationwidechildrens.org/diagnostic_images/slide_images/nationwidechildrens.org_CESC.diagnostic_images.Level_1.200.7.0/.TCGA-FU-A3HZ-01Z-00-DX1.1E78D8EF-B1FF-49AC-9EC4-D6CC2CD5D170.svs.qhJFlT
/TCGA_MIRROR/tcga-data.nci.nih.gov/tcgafiles/ftp_auth/distro_ftpusers/anonymous/tumor/cesc/bcr/nationwidechildrens.org/diagnostic_images/slide_images/nationwidechildrens.org_CESC.diagnostic_images.Level_1.127.5.0/TCGA-DS-A1OA-01Z-00-DX1.2452ACCB-9788-44C3-8041-3D9E5A3620BD.svs
/TCGA_MIRROR/tcga-data.nci.nih.gov/tcgafiles/ftp_auth/distro_ftpusers/anonymous/tumor/cesc/bcr/nationwidechildrens.org/diagnostic_images/slide_images/nationwidechildrens.org_CESC.diagnostic_images.Level_1.127.5.0/TCGA-DS-A1OB-01Z-00-DX1.F3E3B423-D5E3-463A-BE95-76C2ADD14915.svs
/TCGA_MIRROR/tcga-data.nci.nih.gov/tcgafiles/ftp_auth/distro_ftpusers/anonymous/tumor/cesc/bcr/nationwidechildrens.org/diagnostic_images/slide_images/nationwidechildrens.org_CESC.diagnostic_images.Level_1.127.3.0/TCGA-DS-A1OA-01Z-00-DX1.2452ACCB-9788-44C3-8041-3D9E5A3620BD.svs
/TCGA_MIRROR/tcga-data.nci.nih.gov/tcgafiles/ftp_auth/distro_ftpusers/anonymous/tumor/cesc/bcr/nationwidechildrens.org/diagnostic_images/slide_images/nationwidechildrens.org_CESC.diagnostic_images.Level_1.127.3.0/TCGA-DS-A1OB-01Z-00-DX1.F3E3B423-D5E3-463A-BE95-76C2ADD14915.svs
/TCGA_MIRROR/tcga-data.nci.nih.gov/tcgafiles/ftp_auth/distro_ftpusers/anonymous/tumor/ucec/bcr/nationwidechildrens.org/diagnostic_images/slide_images/nationwidechildrens.org_UCEC.diagnostic_images.Level_1.186.4.0/TCGA-E6-A2P9-01Z-00-DX1.CFE4D87E-45D6-40ED-9E87-E80E1A63D04B.svs
/TCGA_MIRROR/tcga-data.nci.nih.gov/tcgafiles/ftp_auth/distro_ftpusers/anonymous/tumor/read/bcr/nationwidechildrens.org/diagnostic_images/slide_images/nationwidechildrens.org_READ.diagnostic_images.Level_1.102.7.0/TCGA-AG-3742-01Z-00-DX1.E662D7A7-1022-4120-BCB8-845732E07D63.svs
/TCGA_MIRROR/tcga-data.nci.nih.gov/tcgafiles/ftp_auth/distro_ftpusers/anonymous/tumor/read/bcr/nationwidechildrens.org/diagnostic_images/slide_images/nationwidechildrens.org_READ.diagnostic_images.Level_1.102.7.0/TCGA-AG-3732-01Z-00-DX1.5EC57511-2B19-4005-BCA0-333C387C66E6.svs
/TCGA_MIRROR/tcga-data.nci.nih.gov/tcgafiles/ftp_auth/distro_ftpusers/anonymous/tumor/read/bcr/nationwidechildrens.org/diagnostic_images/slide_images/nationwidechildrens.org_READ.diagnostic_images.Level_1.122.5.0/TCGA-AG-3592-01Z-00-DX1.61880D0C-B777-482B-9543-FCA685731B9B.svs
/TCGA_MIRROR/tcga-data.nci.nih.gov/tcgafiles/ftp_auth/distro_ftpusers/anonymous/tumor/read/bcr/nationwidechildrens.org/diagnostic_images/slide_images/nationwidechildrens.org_READ.diagnostic_images.Level_1.122.5.0/TCGA-AG-3591-01Z-00-DX1.C06D77F2-D55E-433E-A84D-41E6273C8C61.svs
/TCGA_MIRROR/tcga-data.nci.nih.gov/tcgafiles/ftp_auth/distro_ftpusers/anonymous/tumor/read/bcr/nationwidechildrens.org/diagnostic_images/slide_images/nationwidechildrens.org_READ.diagnostic_images.Level_1.122.5.0/TCGA-AG-3731-01Z-00-DX1.9C30A84C-DD47-47C4-8899-3A1EF5153E1A.svs
/TCGA_MIRROR/tcga-data.nci.nih.gov/tcgafiles/ftp_auth/distro_ftpusers/anonymous/tumor/read/bcr/nationwidechildrens.org/diagnostic_images/slide_images/nationwidechildrens.org_READ.diagnostic_images.Level_1.122.5.0/TCGA-AG-3725-01Z-00-DX1.86CF8220-F410-49E4-A840-E42AE8C5D8B2.svs
/TCGA_MIRROR/tcga-data.nci.nih.gov/tcgafiles/ftp_auth/distro_ftpusers/anonymous/tumor/read/bcr/nationwidechildrens.org/diagnostic_images/slide_images/nationwidechildrens.org_READ.diagnostic_images.Level_1.46.7.0/TCGA-AG-4009-01Z-00-DX1.A66BDD56-216C-4424-8168-9295A307F98F.svs
/TCGA_MIRROR/tcga-data.nci.nih.gov/tcgafiles/ftp_auth/distro_ftpusers/anonymous/tumor/read/bcr/nationwidechildrens.org/diagnostic_images/slide_images/nationwidechildrens.org_READ.diagnostic_images.Level_1.46.7.0/TCGA-AG-3891-01Z-00-DX1.82E8836A-91E7-48CA-A7C1-501A00F20298.svs
/TCGA_MIRROR/tcga-data.nci.nih.gov/tcgafiles/ftp_auth/distro_ftpusers/anonymous/tumor/brca/bcr/nationwidechildrens.org/diagnostic_images/slide_images/nationwidechildrens.org_BRCA.diagnostic_images.Level_1.379.7.0/TCGA-3C-AALI-01Z-00-DX1.F6E9A5DF-D8FB-45CF-B4BD-C6B76294C291.svs
/TCGA_MIRROR/tcga-data.nci.nih.gov/tcgafiles/ftp_auth/distro_ftpusers/anonymous/tumor/brca/bcr/nationwidechildrens.org/diagnostic_images/slide_images/nationwidechildrens.org_BRCA.diagnostic_images.Level_1.296.5.0/TCGA-OL-A5RW-01Z-00-DX1.E16DE8EE-31AF-4EAF-A85F-DB3E3E2C3BFF.svs
/TCGA_MIRROR/tcga-data.nci.nih.gov/tcgafiles/ftp_auth/distro_ftpusers/anonymous/tumor/brca/bcr/nationwidechildrens.org/diagnostic_images/slide_images/nationwidechildrens.org_BRCA.diagnostic_images.Level_1.296.5.0/TCGA-OL-A5S0-01Z-00-DX1.49A7AC9D-C186-406C-BA67-2D73DE82E13B.svs
/TCGA_MIRROR/tcga-data.nci.nih.gov/tcgafiles/ftp_auth/distro_ftpusers/anonymous/tumor/brca/bcr/nationwidechildrens.org/diagnostic_images/slide_images/nationwidechildrens.org_BRCA.diagnostic_images.Level_1.296.5.0/TCGA-OL-A5RX-01Z-00-DX1.15A0D4F4-2744-4D44-8883-27FF83D9C824.svs
/TCGA_MIRROR/tcga-data.nci.nih.gov/tcgafiles/ftp_auth/distro_ftpusers/anonymous/tumor/brca/bcr/nationwidechildrens.org/diagnostic_images/slide_images/nationwidechildrens.org_BRCA.diagnostic_images.Level_1.296.5.0/TCGA-OL-A5RV-01Z-00-DX1.920AC243-1DAC-4854-BEB6-1CBCC950F26B.svs
/TCGA_MIRROR/tcga-data.nci.nih.gov/tcgafiles/ftp_auth/distro_ftpusers/anonymous/tumor/brca/bcr/nationwidechildrens.org/diagnostic_images/slide_images/nationwidechildrens.org_BRCA.diagnostic_images.Level_1.296.5.0/TCGA-OL-A5RY-01Z-00-DX1.AE4E9D74-FC1C-4C1E-AE6D-5DF38899BBA6.svs
/TCGA_MIRROR/tcga-data.nci.nih.gov/tcgafiles/ftp_auth/distro_ftpusers/anonymous/tumor/brca/bcr/nationwidechildrens.org/diagnostic_images/slide_images/nationwidechildrens.org_BRCA.diagnostic_images.Level_1.296.5.0/TCGA-OL-A5RU-01Z-00-DX1.A48CAF2D-9310-4611-B27D-400F3A324607.svs
/TCGA_MIRROR/tcga-data.nci.nih.gov/tcgafiles/ftp_auth/distro_ftpusers/anonymous/tumor/brca/bcr/nationwidechildrens.org/diagnostic_images/slide_images/nationwidechildrens.org_BRCA.diagnostic_images.Level_1.296.5.0/TCGA-OL-A5RZ-01Z-00-DX1.6394C05E-1C34-4F4B-8859-F5E961E7EFF9.svs
/TCGA_MIRROR/tcga-data.nci.nih.gov/tcgafiles/ftp_auth/distro_ftpusers/anonymous/tumor/brca/bcr/nationwidechildrens.org/tissue_images/slide_images/nationwidechildrens.org_BRCA.tissue_images.Level_1.47.6.0/TCGA-A8-A06U-01A-01-TS1.63824040-373f-4c6c-a74e-881c127567a6.svs
/TCGA_MIRROR/tcga-data.nci.nih.gov/tcgafiles/ftp_auth/distro_ftpusers/anonymous/tumor/brca/bcr/nationwidechildrens.org/tissue_images/slide_images/nationwidechildrens.org_BRCA.tissue_images.Level_1.47.4.0/TCGA-A8-A06U-01A-01-TS1.svs
/TCGA_MIRROR/tcga-data.nci.nih.gov/tcgafiles/ftp_auth/distro_ftpusers/anonymous/tumor/brca/bcr/nationwidechildrens.org/tissue_images/slide_images/nationwidechildrens.org_BRCA.tissue_images.Level_1.47.7.0/TCGA-A8-A06U-01A-01-TS1.63824040-373f-4c6c-a74e-881c127567a6.svs
/TCGA_MIRROR/tcga-data.nci.nih.gov/tcgafiles/ftp_auth/distro_ftpusers/anonymous/tumor/brca/bcr/nationwidechildrens.org/tissue_images/slide_images/nationwidechildrens.org_BRCA.tissue_images.Level_1.47.8.0/TCGA-A8-A06U-01A-01-TS1.63824040-373f-4c6c-a74e-881c127567a6.svs
/TCGA_MIRROR/tcga-data.nci.nih.gov/tcgafiles/ftp_auth/distro_ftpusers/anonymous/tumor/brca/bcr/nationwidechildrens.org/tissue_images/slide_images/nationwidechildrens.org_BRCA.tissue_images.Level_1.47.5.0/TCGA-A8-A06U-01A-01-TS1.63824040-373f-4c6c-a74e-881c127567a6.svs
/TCGA_MIRROR/tcga-data.nci.nih.gov/tcgafiles/ftp_auth/distro_ftpusers/anonymous/tumor/brca/bcr/nationwidechildrens.org/tissue_images/slide_images/nationwidechildrens.org_BRCA.tissue_images.Level_1.96.1.0/TCGA-AR-A1AI-01A-01-TSA.svs
/TCGA_MIRROR/tcga-data.nci.nih.gov/tcgafiles/ftp_auth/distro_ftpusers/anonymous/tumor/brca/bcr/nationwidechildrens.org/tissue_images/slide_images/nationwidechildrens.org_BRCA.tissue_images.Level_1.96.0.0/TCGA-AR-A1AI-01A-01-TSA.svs
/TCGA_MIRROR/tcga-data.nci.nih.gov/tcgafiles/ftp_auth/distro_ftpusers/anonymous/tumor/kirp/bcr/nationwidechildrens.org/diagnostic_images/slide_images/nationwidechildrens.org_KIRP.diagnostic_images.Level_1.432.2.0/TCGA-UZ-A9PQ-01Z-00-DX1.C2CB0E94-2548-4399-BCAB-E4D556D533EF.svs
/TCGA_MIRROR/tcga-data.nci.nih.gov/tcgafiles/ftp_auth/distro_ftpusers/anonymous/tumor/kirp/bcr/nationwidechildrens.org/diagnostic_images/slide_images/nationwidechildrens.org_KIRP.diagnostic_images.Level_1.432.4.0/TCGA-UZ-A9PQ-01Z-00-DX1.C2CB0E94-2548-4399-BCAB-E4D556D533EF.svs
/TCGA_MIRROR/tcga-data.nci.nih.gov/tcgafiles/ftp_auth/distro_ftpusers/anonymous/tumor/kirp/bcr/nationwidechildrens.org/diagnostic_images/slide_images/nationwidechildrens.org_KIRP.diagnostic_images.Level_1.432.3.0/TCGA-UZ-A9PQ-01Z-00-DX1.C2CB0E94-2548-4399-BCAB-E4D556D533EF.svs
/TCGA_MIRROR/tcga-data.nci.nih.gov/tcgafiles/ftp_auth/distro_ftpusers/anonymous/tumor/kirp/bcr/nationwidechildrens.org/diagnostic_images/slide_images/nationwidechildrens.org_KIRP.diagnostic_images.Level_1.432.1.0/TCGA-UZ-A9PQ-01Z-00-DX1.C2CB0E94-2548-4399-BCAB-E4D556D533EF.svs
/TCGA_MIRROR/tcga-data.nci.nih.gov/tcgafiles/ftp_auth/distro_ftpusers/anonymous/tumor/kirp/bcr/nationwidechildrens.org/diagnostic_images/slide_images/nationwidechildrens.org_KIRP.diagnostic_images.Level_1.432.0.0/TCGA-UZ-A9PQ-01Z-00-DX1.C2CB0E94-2548-4399-BCAB-E4D556D533EF.svs
/TCGA_MIRROR/tcga-data.nci.nih.gov/tcgafiles/ftp_auth/distro_ftpusers/anonymous/tumor/sarc/bcr/nationwidechildrens.org/diagnostic_images/slide_images/nationwidechildrens.org_SARC.diagnostic_images.Level_1.307.1.0/.TCGA-JV-A5VE-01Z-00-DX1.C4538A0E-F16A-4C51-A980-D9DAB33E250F.svs.WyiMAz
/TCGA_MIRROR/tcga-data.nci.nih.gov/tcgafiles/ftp_auth/distro_ftpusers/anonymous/tumor/blca/bcr/nationwidechildrens.org/diagnostic_images/slide_images/nationwidechildrens.org_BLCA.diagnostic_images.Level_1.235.5.0/TCGA-CU-A3QU-01Z-00-DX1.661F12B3-2865-468B-828F-663B9E9B5DDE.svs
/TCGA_MIRROR/tcga-data.nci.nih.gov/tcgafiles/ftp_auth/distro_ftpusers/anonymous/tumor/blca/bcr/nationwidechildrens.org/diagnostic_images/slide_images/nationwidechildrens.org_BLCA.diagnostic_images.Level_1.235.1.0/TCGA-CU-A3QU-01Z-00-DX1.661F12B3-2865-468B-828F-663B9E9B5DDE.svs
/TCGA_MIRROR/tcga-data.nci.nih.gov/tcgafiles/ftp_auth/distro_ftpusers/anonymous/tumor/thym/bcr/nationwidechildrens.org/diagnostic_images/slide_images/nationwidechildrens.org_THYM.diagnostic_images.Level_1.431.4.0/TCGA-4X-A9F9-01Z-00-DXD.ED7DB698-C766-4E84-816A-1DAF551BC67F.svs
/TCGA_MIRROR/tcga-data.nci.nih.gov/tcgafiles/ftp_auth/distro_ftpusers/anonymous/tumor/thym/bcr/nationwidechildrens.org/diagnostic_images/slide_images/nationwidechildrens.org_THYM.diagnostic_images.Level_1.431.6.0/TCGA-4X-A9F9-01Z-00-DXD.ED7DB698-C766-4E84-816A-1DAF551BC67F.svs
/TCGA_MIRROR/tcga-data.nci.nih.gov/tcgafiles/ftp_auth/distro_ftpusers/anonymous/tumor/thym/bcr/nationwidechildrens.org/diagnostic_images/slide_images/nationwidechildrens.org_THYM.diagnostic_images.Level_1.431.5.0/TCGA-4X-A9F9-01Z-00-DXD.ED7DB698-C766-4E84-816A-1DAF551BC67F.svs
/TCGA_MIRROR/tcga-data.nci.nih.gov/tcgafiles/ftp_auth/distro_ftpusers/anonymous/tumor/thym/bcr/nationwidechildrens.org/diagnostic_images/slide_images/nationwidechildrens.org_THYM.diagnostic_images.Level_1.431.7.0/TCGA-4X-A9F9-01Z-00-DXD.ED7DB698-C766-4E84-816A-1DAF551BC67F.svs
/TCGA_MIRROR/tcga-data.nci.nih.gov/tcgafiles/ftp_auth/distro_ftpusers/anonymous/tumor/thym/bcr/nationwidechildrens.org/diagnostic_images/slide_images/nationwidechildrens.org_THYM.diagnostic_images.Level_1.431.3.0/TCGA-4X-A9F9-01Z-00-DXD.ED7DB698-C766-4E84-816A-1DAF551BC67F.svs
/TCGA_MIRROR/tcga-data.nci.nih.gov/tcgafiles/ftp_auth/distro_ftpusers/anonymous/tumor/thym/bcr/nationwidechildrens.org/diagnostic_images/slide_images/nationwidechildrens.org_THYM.diagnostic_images.Level_1.431.8.0/TCGA-4X-A9F9-01Z-00-DXD.ED7DB698-C766-4E84-816A-1DAF551BC67F.svs
/TCGA_MIRROR/tcga-data.nci.nih.gov/tcgafiles/ftp_auth/distro_ftpusers/anonymous/tumor/thym/bcr/nationwidechildrens.org/diagnostic_images/slide_images/nationwidechildrens.org_THYM.diagnostic_images.Level_1.431.10.0/TCGA-4X-A9F9-01Z-00-DXD.ED7DB698-C766-4E84-816A-1DAF551BC67F.svs
/TCGA_MIRROR/tcga-data.nci.nih.gov/tcgafiles/ftp_auth/distro_ftpusers/anonymous/tumor/thym/bcr/nationwidechildrens.org/diagnostic_images/slide_images/nationwidechildrens.org_THYM.diagnostic_images.Level_1.431.9.0/TCGA-4X-A9F9-01Z-00-DXD.ED7DB698-C766-4E84-816A-1DAF551BC67F.svs

In [5]:
gbm_path_test = '/TCGA_MIRROR/tcga-data.nci.nih.gov/tcgafiles/ftp_auth/distro_ftpusers/anonymous/tumor/coad/bcr/'
import os
import timeit

import time

class Timer:    
    def __enter__(self):
        self.start = time.clock()
        return self

    def __exit__(self, *args):
        self.end = time.clock()
        self.interval = self.end - self.start

        
with Timer() as t:
    my_svs_list = [f for root,dirs,files in os.walk(gbm_path_test) for f in files if f.endswith('.svs')] 
print('Request took %.03f sec.' % t.interval)
        
### PROABABLY SHOULD ADD LEAST ADD THE BCR DIRECTORY SO I DON"T SEARCH ALL OTHER PATHS
## SEARCHING JUST GBM/bcr/intgen  TOOK  3.61 seconds
## Seaching ALL of GBM took 46 seconds
## Now lets add BCR and see how long it takes  32.576 seconds, so definitely quicker


Request took 43.993 sec.

In [ ]:
### NEEED TO ADD A FUNCTION TO INSERT METADATA IF THE KEY ISN"T THERE
## AND ADD A FORCE_UPDATE FLAG if the new tag doesn't match the old data in case of a dictionary
        
with Timer() as t:
    my_svs_list = [f for root,dirs,files in os.walk(gbm_path_test+'bcr/') for f in files if f.endswith('.svs')] 
print('Request took %.03f sec.' % t.interval)

In [ ]:
#db_ptr['prad'].find_one({'slide_name': 'TCGA-G9-7523-01A-01-TS1.946b3311-92c7-4023-aed3-16d4280a9725.svs'})

In [105]:
## GET SOME INFO ON DUPLICATE PYRAMIDS-- PROBABLY CAN JUST DELETE THEM
import os,sys
load_errors_db.collection_names()

dup_cur = load_errors_db['Pyramid_Dups'].find()

f_del_out = open('pyrs_that_are_dupes.txt','w')

dup_pyramids = 0
for dc in dup_cur:
    print dc
    dup_pyramids +=  os.path.getsize(dc['pyr_thats_duped'])
    #print dc
    #f_del_out.write( dc['pyr_thats_duped']+'\n'   ) 
    #sys.exit()
#    sys.exit()
print dup_pyramids


0

In [92]:
test_me = '/PYRAMIDS/PYRAMIDS/CDSA/DLBC_Frozen/intgen.org_DLBC.tissue_images.Level_1.248.0.0/TCGA-FA-8693-01A-01-BS1.263ab76a-8168-4f72-8147-8c2a44d0948b.svs.dzi.tif'
pyr_name = ''



pyramid_db = client['CDSA_Pyramid_DB']
print pyramid_db['DLBC_Frozen'].find_one({'pyramid_w_path': test_me})


{u'slide_md5': None, u'pyramid_name': u'TCGA-FA-8693-01A-01-BS1.263ab76a-8168-4f72-8147-8c2a44d0948b.svs.dzi.tif', u'height': 87735, u'width': 73304, u'pyramid_properties': {u'openslide_level[1]_width': u'36652', u'openslide_level[8]_tile-height': u'256', u'openslide_level[0]_tile-height': u'256', u'tiff_XResolution': u'10', u'openslide_level[6]_tile-height': u'256', u'openslide_level[3]_tile-height': u'256', u'openslide_level[6]_height': u'1370', u'openslide_level[7]_tile-height': u'256', u'openslide_level[7]_downsample': u'128.11706906232456', u'openslide_level[3]_downsample': u'8.0003191683384998', u'openslide_level[7]_height': u'685', u'openslide_level[6]_downsample': u'64.030553342045721', u'openslide_level[3]_width': u'9163', u'openslide_level[6]_tile-width': u'256', u'openslide_level-count': u'10', u'openslide_vendor': u'generic-tiff', u'openslide_level[4]_tile-height': u'256', u'openslide_level[1]_tile-height': u'256', u'openslide_level[0]_width': u'73304', u'openslide_level[4]_height': u'5483', u'tiff_YResolution': u'10', u'openslide_level[1]_tile-width': u'256', u'openslide_level[2]_downsample': u'4.0000683900971143', u'openslide_level[5]_tile-width': u'256', u'openslide_level[1]_downsample': u'2.0000113980896801', u'openslide_level[3]_tile-width': u'256', u'openslide_level[8]_width': u'286', u'openslide_level[8]_tile-width': u'256', u'openslide_level[4]_tile-width': u'256', u'openslide_level[1]_height': u'43867', u'openslide_level[4]_downsample': u'16.001511508473552', u'openslide_level[2]_tile-height': u'256', u'openslide_quickhash-1': u'2ea789bc9cad27d06b3eb9d0454f65cd61f8ae2c67687d437be4537462460bfc', u'openslide_level[8]_downsample': u'256.42139001349528', u'openslide_level[2]_height': u'21933', u'openslide_level[5]_height': u'2741', u'openslide_level[5]_tile-height': u'256', u'openslide_level[9]_height': u'171', u'openslide_level[2]_width': u'18326', u'openslide_level[9]_tile-height': u'256', u'openslide_level[9]_downsample': u'512.84278002699057', u'openslide_level[0]_downsample': u'1', u'openslide_level[0]_tile-width': u'256', u'tiff_ResolutionUnit': u'centimeter', u'openslide_level[5]_width': u'2290', u'openslide_level[2]_tile-width': u'256', u'openslide_level[6]_width': u'1145', u'openslide_level[7]_width': u'572', u'openslide_level[7]_tile-width': u'256', u'openslide_level[3]_height': u'10966', u'openslide_level[4]_width': u'4581', u'openslide_level[9]_width': u'143', u'openslide_level[5]_downsample': u'32.009435723742172', u'openslide_level[0]_height': u'87735', u'openslide_level[9]_tile-width': u'256', u'openslide_level[8]_height': u'342'}, u'pyramid_w_path': u'/PYRAMIDS/PYRAMIDS/CDSA/DLBC_Frozen/intgen.org_DLBC.tissue_images.Level_1.248.0.0/TCGA-FA-8693-01A-01-BS1.263ab76a-8168-4f72-8147-8c2a44d0948b.svs.dzi.tif', u'file_size': 1039165120, u'conversion_info': u'TBD', u'_id': ObjectId('55e092732f9b2e67e17ee9d9')}